package com.momo.demo10_txt;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;

import com.momo.utils.LuceneUtils;

/**
 * txt文件索引
 */
public class IndexFile {

	private Directory directory;

	private String indexPath = "D://lucene/index"; // 建立索引文件的目录

	private String dirPath = "D://lucene/data"; // txt资源目录

	private IndexWriter indexWriter;

	/*
	 * 获得所有txt文件
	 */
	public List<File> getFileList(String dirPath) {
		File[] files = new File(dirPath).listFiles();
		List<File> fileList = new ArrayList<File>();
		for (File file : files) {
			if (isTxtFile(file.getName())) {
				fileList.add(file);
			}
		}
		return fileList;
	}

	/*
	 * 判断是否是txt文件
	 */
	public boolean isTxtFile(String fileName) {
		if (fileName.lastIndexOf(".txt") > 0) {
			return true;
		}
		return false;
	}

	/*
	 * 将文件转换成Document对象
	 */
	public Document fileToDocument(File file) throws Exception {
		Document document = new Document();
		document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
		document.add(new Field("content", getFileContent(file), Store.YES, Index.ANALYZED));
		document.add(new Field("size", String.valueOf(file.getTotalSpace()), Store.YES, Index.ANALYZED));
		document.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.NO));
		return document;
	}

	/*
	 * 获得indexwriter对象
	 */
	public IndexWriter getIndexWriter(Directory dir) throws Exception {
		IndexWriter indexWriter = new IndexWriter(dir, LuceneUtils.analyzer, MaxFieldLength.LIMITED);
		return indexWriter;
	}

	/*
	 * 关闭indexwriter对象
	 */
	public void closeWriter() throws Exception {
		if (indexWriter != null) {
			indexWriter.close();
		}
	}

	/*
	 * 读取文件内容
	 */
	public String getFileContent(File file) throws Exception {
		Reader reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
		BufferedReader br = new BufferedReader(reader);
		String result = "";
		while (br.readLine() != null) {
			result = result + "\n" + br.readLine();
		}
		br.close();
		reader.close();
		return result;
	}

	/**
	 * 启动初始化
	 */
	@Before
	public void init() {
		try {
			directory = FSDirectory.open(new File(indexPath));
			indexWriter = getIndexWriter(directory);
		}
		catch (Exception e) {
			System.out.println("索引打开异常！");
		}
	}

	/**
	 * 创建索引将data文件夹下txt文件获取并把这些文件信息添加索引index文件夹
	 */
	@Test
	public void createIndex() throws Exception {
		List<File> fileList = getFileList(dirPath);
		Document document = null;
		for (File file : fileList) {
			document = fileToDocument(file);
			indexWriter.addDocument(document);
			System.out.println("path:" + document.get("path"));
			System.out.println("filename:" + document.get("filename"));
			System.out.println("size:" + document.get("size"));
			System.err.println("content:" + document.get("content"));
			indexWriter.commit();
		}
		closeWriter();
	}

	/**
	 * @throws IOException
	 */
	@Test
	public void searchIndex() throws Exception {
		IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(new File(indexPath)));
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "filename", "content" }, LuceneUtils.analyzer);
		Query query = queryParser.parse("机器人");
		TopDocs topDocs = indexSearcher.search(query, 25);
		int count = topDocs.totalHits;// 总的抓取命中的记录数
		System.out.println("总的命中数" + count);
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		// 将查询到的索引添加到articleList中
		Map<String, Object> map = new HashMap<String, Object>();
		List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
		for (int i = 0; i < scoreDocs.length; i++) {
			int index = scoreDocs[i].doc;
			Document document = indexSearcher.doc(index);
			System.out.println("当前文件名:" + document.get("filename") + "\t匹配度得分：" + scoreDocs[i].score);
			map.put("path", document.get("path"));
			map.put("filename", document.get("filename"));
			map.put("content", document.get("content"));
			map.put("size", document.get("size"));
			list.add(map);
		}

		// 输出查询到的内容
		for (Map<String, Object> data : list) {
			System.out.println("文件路径" + data.get("path"));
			System.out.println("文件名" + data.get("filename"));
			System.out.println("文件内容" + data.get("content"));
			System.out.println("文件大小" + data.get("size"));
		}
	}
}
